'''
使用pypdf2来读取pdf文件元信息
pypdf2不适合读取PDF正文内容，PDFMiner库专门为读取PDF文件设计，建议使用PDFMiner。
'''

from PyPDF2 import PdfFileReader

def read_pdf(pdfpath):
    with open(pdfpath, 'rb') as f:
        pdf = PdfFileReader(f)
        pdf_info = pdf.getDocumentInfo()
        num_pages = pdf.getNumPages()
        text = pdf.extractText()
        print(text)

    txt = f"""
    Information about {pdfpath}: 

    Author: {pdf_info.author}
    Creator: {pdf_info.creator}
    Producer: {pdf_info.producer}
    Subject: {pdf_info.subject}
    Title: {pdf_info.title}
    Number of pages: {num_pages}
    """

    print(txt)
    return pdf_info

pdf_info = read_pdf('岳阳楼记.pdf')
print(pdf_info)